import os
## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')
import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
## Conda Env pytorch-gpu on DCC
print(torch.__version__)
print(sc.__version__)
1.13.0.post200 1.9.1
## Genes considered/used (shared among samples)
gene_list = pd.read_csv('./gene_list_1108.csv')
with open("./genesys_root_data.pkl", 'rb') as file_handle:
data = pickle.load(file_handle)
batch_size = 2000
dataset = Root_Dataset(data['X_test'], data['y_test'])
loader = DataLoader(dataset,
batch_size = batch_size,
shuffle = True, drop_last=True)
input_size = data['X_train'].shape[1]
## 10 cell types
output_size = 10
embedding_dim = 256
hidden_dim = 256
n_layers = 2
device = "cpu"
path = "./"
model = ClassifierLSTM(input_size, output_size, embedding_dim, hidden_dim, n_layers).to(device)
model.load_state_dict(torch.load(path+"best_ALL_1130_continue.pth", map_location=torch.device('cpu')))
model = model
model.eval()
ClassifierLSTM(
(fc1): Sequential(
(0): Linear(in_features=17513, out_features=256, bias=True)
(1): Dropout(p=0.2, inplace=False)
(2): GaussianNoise()
)
(fc): Sequential(
(0): ReLU()
(1): Linear(in_features=512, out_features=512, bias=True)
(2): ReLU()
(3): Linear(in_features=512, out_features=10, bias=True)
)
(lstm): LSTM(256, 256, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
(dropout): Dropout(p=0.2, inplace=False)
(b_to_z): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(bz2_infer_z1): DBlock(
(fc1): Linear(in_features=1024, out_features=256, bias=True)
(fc2): Linear(in_features=1024, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z1_to_z2): DBlock(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=512, out_features=256, bias=True)
(fc_mu): Linear(in_features=256, out_features=512, bias=True)
(fc_logsigma): Linear(in_features=256, out_features=512, bias=True)
)
(z_to_x): Decoder(
(fc1): Linear(in_features=512, out_features=256, bias=True)
(fc2): Linear(in_features=256, out_features=256, bias=True)
(fc3): Linear(in_features=256, out_features=17513, bias=True)
)
)
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
## GRN for the transition t0 to t1
for ct in cts:
print(ct)
cws = np.zeros((len(loader), 17513, 17513))
with torch.no_grad():
for i, sample in enumerate(loader):
x = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
pred_h = model.init_hidden(batch_size)
tfrom = model.generate_current(x, pred_h, 0).to('cpu').detach().numpy()
cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
pred_h = model.init_hidden(batch_size)
tto = model.generate_next(x, pred_h, 0).to('cpu').detach().numpy()
cto = tto[np.where(np.array(y_label)==ct)[0],:]
cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
cws[i] = cw
## Calculate mean across number of repeats
cwm = np.mean(cws, axis=0)
ctw[cts.index(ct)] = cwm
Atrichoblast Trichoblast Cortex Endodermis Pericycle Procambium Xylem Phloem Lateral Root Cap Columella
# Save the array to disk
np.save('genesys_ctw_t0-t1.npy', ctw)
ctw = np.load('genesys_ctw_t0-t1.npy')
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
ctw_f[i] = np.abs(ctw_z[i]) > threshold
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
pd.Series(wanted_TFs['Name']).value_counts().head(5)
NAC001 1 PRE5 1 MYB118 1 MYB21 1 MYB0 1 Name: Name, dtype: int64
TFidx = []
for i in wanted_TFs['GeneID']:
if i in gene_list['features'].tolist():
TFidx.append(np.where(gene_list['features']==i)[0][0])
TFidx = np.sort(np.array(TFidx))
def network(i):
## No weights
adj_nw = ctw_f[i]
## Weighted
adj = ctw[i]*ctw_f[i]
## TF only
adj = adj[np.ix_(TFidx,TFidx)]
adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
## Remove no connect
regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
## Reciprocol
keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
#keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
## TF name to keep
TFname = []
for i in np.array(gene_list['features'][TFidx])[keepidx]:
TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
adj = adj[np.ix_(keepidx,keepidx)]
# Create a NetworkX graph for non-directed edges
G = nx.Graph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(i, j, weight=abs(weight), distance=1/abs(weight))
## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
closeness_centrality = nx.closeness_centrality(G, distance='distance')
## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
eigenvector_centrality = nx.eigenvector_centrality(G)
# Create a NetworkX graph for diected edges
G = nx.MultiDiGraph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(i, j, weight=weight)
## Measures the number of connections (edges) each node has
degree_centrality = nx.degree_centrality(G)
# Calculate outgoing centrality
out_centrality = nx.out_degree_centrality(G)
# Calculate incoming centrality
in_centrality = nx.in_degree_centrality(G)
## Measures the extent to which a node lies on the shortest paths between other nodes.
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
## Non_Reciprocal Out centrality
# Visualize the graph
pos = nx.spring_layout(G) # Positions of the nodes
# Node colors based on weighted betweenness centrality
node_colors = [out_centrality[node] for node in G.nodes()]
# Node sizes based on weighted betweenness centrality
node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]
# Get the edge weights as a dictionary
edge_weights = nx.get_edge_attributes(G, 'weight')
edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
# Scale the edge weights to desired linewidths
max_weight = max(edge_weights.values())
edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
# Draw the graph
nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
# Add node labels
labels = {node: G.nodes[node]['name'] for node in G.nodes}
nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
# Add a colorbar to show the weighted betweenness centrality color mapping
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
sm.set_array([])
plt.colorbar(sm)
# Show the plot
plt.show()
dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
df.index =TFname
df = df.sort_values('betweenness_centrality', ascending=False)
return(df)
atri = network(0)
atri
| degree_centrality | out_centrality | in_centrality | betweenness_centrality | closeness_centrality | eigenvector_centrality | |
|---|---|---|---|---|---|---|
| HDA3 | 0.858357 | 0.056657 | 0.801700 | 0.993578 | 0.002648 | 0.118494 |
| BRON | 0.742210 | 0.169972 | 0.572238 | 0.991839 | 0.002406 | 0.112570 |
| AT2G35605 | 1.025496 | 0.280453 | 0.745042 | 0.988355 | 0.002503 | 0.124640 |
| NAC094 | 0.770538 | 0.283286 | 0.487252 | 0.987437 | 0.002262 | 0.109741 |
| SMB | 0.733711 | 0.201133 | 0.532578 | 0.986005 | 0.002383 | 0.107298 |
| ... | ... | ... | ... | ... | ... | ... |
| CCA1 | 0.011331 | 0.002833 | 0.008499 | 0.000000 | 0.000914 | 0.004676 |
| IDD2 | 0.062323 | 0.036827 | 0.025496 | 0.000000 | 0.001067 | 0.019845 |
| AT2G36930 | 0.175637 | 0.031161 | 0.144476 | 0.000000 | 0.001792 | 0.043830 |
| NAC084 | 0.110482 | 0.093484 | 0.016997 | 0.000000 | 0.001513 | 0.030022 |
| BPC5 | 0.011331 | 0.005666 | 0.005666 | 0.000000 | 0.000809 | 0.003438 |
354 rows × 6 columns
tri = network(1)
cor = network(2)
end = network(3)
per = network(4)
pro = network(5)
xyl = network(6)
phl = network(7)
lrc = network(8)
col = network(9)
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts(), columns=['tf_occurance'])
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
| atri_betweenness_centrality | tri_betweenness_centrality | lrc_betweenness_centrality | atri_out_centrality | tri_out_centrality | lrc_out_centrality | atri_in_centrality | tri_in_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| WRKY27 | 0.161690 | 0.114025 | 0.007362 | 0.150142 | 0.164875 | 0.084746 | 0.053824 | 0.150538 | 0.072639 | 9 | 9.959841 |
| AT2G36026 | 0.005127 | 0.008690 | 0.011545 | 0.189802 | 0.157706 | 0.107748 | 0.062323 | 0.129032 | 0.150121 | 9 | 9.822093 |
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
| atri_betweenness_centrality | tri_betweenness_centrality | atri_out_centrality | tri_out_centrality | atri_in_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| GL2 | 0.728424 | 0.966736 | 0.201133 | 0.336918 | 0.294618 | 0.387097 | 6 | 8.914925 |
| RHD6 | 0.000008 | 0.885047 | 0.127479 | 0.473118 | 0.039660 | 0.164875 | 6 | 7.690186 |
| TTG2 | 0.168604 | 0.645548 | 0.059490 | 0.086022 | 0.084986 | 0.200717 | 6 | 7.245366 |
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| atri_betweenness_centrality | atri_out_centrality | atri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| IDD7 | 0.181424 | 0.444759 | 0.087819 | 3 | 3.714002 |
| GRF3 | 0.005191 | 0.087819 | 0.031161 | 3 | 3.124171 |
| HTA2 | 0.001602 | 0.161473 | 0.076487 | 3 | 3.239562 |
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| tri_betweenness_centrality | tri_out_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT3G53370 | 0.984168 | 0.096774 | 0.215054 | 3 | 4.295995 |
| WRKY9 | 0.982749 | 0.189964 | 0.394265 | 3 | 4.566979 |
| IAA14 | 0.861388 | 0.057348 | 0.075269 | 3 | 3.994005 |
| AT1G61990 | 0.591076 | 0.215054 | 0.071685 | 3 | 3.877814 |
| WRKY72 | 0.302571 | 0.451613 | 0.132616 | 3 | 3.886800 |
| AT1G03650 | 0.099533 | 0.129032 | 0.075269 | 3 | 3.303834 |
| GL3 | 0.011372 | 0.344086 | 0.060932 | 3 | 3.416389 |
| AT5G06110 | 0.008883 | 0.039427 | 0.057348 | 3 | 3.105657 |
| MYB47 | 0.003197 | 0.014337 | 0.021505 | 3 | 3.039040 |
| MBD6 | 0.000026 | 0.236559 | 0.082437 | 3 | 3.319022 |
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| lrc_betweenness_centrality | lrc_out_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB3R-4 | 0.393633 | 0.127119 | 0.015738 | 3 | 3.536490 |
| AT1G02670 | 0.009110 | 0.037530 | 0.035109 | 3 | 3.081749 |
| AT1G76580 | 0.003632 | 0.025424 | 0.127119 | 3 | 3.156174 |
| WRKY65 | 0.001419 | 0.013317 | 0.024213 | 3 | 3.038949 |
| AT3G19360 | 0.001360 | 0.141646 | 0.029056 | 3 | 3.172063 |
| AT2G34450 | 0.001224 | 0.035109 | 0.004843 | 3 | 3.041175 |
| TFIIIA | 0.001211 | 0.001211 | 0.065375 | 3 | 3.067797 |
| ATE2F2 | 0.001208 | 0.018160 | 0.004843 | 3 | 3.024210 |
| AT3G48600 | 0.001140 | 0.012107 | 0.033898 | 3 | 3.047145 |
| AT5G41020 | 0.000544 | 0.042373 | 0.023002 | 3 | 3.065920 |
| AT1G72030 | 0.000186 | 0.052058 | 0.029056 | 3 | 3.081300 |
| AT5G19490 | 0.000170 | 0.066586 | 0.018160 | 3 | 3.084916 |
| MBD5 | 0.000144 | 0.055690 | 0.007264 | 3 | 3.063098 |
| TRP1 | 0.000054 | 0.076271 | 0.019370 | 3 | 3.095696 |
| BRM | 0.000003 | 0.039952 | 0.069007 | 3 | 3.108962 |
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| col_betweenness_centrality | col_out_centrality | col_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT1G70000 | 0.403795 | 0.087515 | 0.075846 | 3 | 3.567156 |
| HMG | 0.343436 | 0.105018 | 0.071179 | 3 | 3.519632 |
| AT3G02890 | 0.154368 | 0.026838 | 0.179697 | 3 | 3.360902 |
| ARF6 | 0.126919 | 0.381564 | 0.302217 | 3 | 3.810700 |
| HAM3 | 0.091571 | 0.017503 | 0.017503 | 3 | 3.126577 |
| MYB14 | 0.021393 | 0.016336 | 0.028005 | 3 | 3.065734 |
| TTG1 | 0.018636 | 0.026838 | 0.012835 | 3 | 3.058309 |
| ERF3 | 0.018472 | 0.045508 | 0.095683 | 3 | 3.159662 |
| IDD1 | 0.016494 | 0.091015 | 0.161027 | 3 | 3.268536 |
| NTT | 0.010256 | 0.275379 | 0.192532 | 3 | 3.478168 |
| AT3G16280 | 0.003157 | 0.221704 | 0.032672 | 3 | 3.257533 |
| AT2G47890 | 0.002889 | 0.019837 | 0.026838 | 3 | 3.049563 |
| ALY2 | 0.001772 | 0.037340 | 0.101517 | 3 | 3.140629 |
| WRKY7 | 0.001519 | 0.037340 | 0.026838 | 3 | 3.065696 |
| AT1G05120 | 0.001167 | 0.074679 | 0.060677 | 3 | 3.136523 |
| LOL2 | 0.001165 | 0.004667 | 0.011669 | 3 | 3.017502 |
| AT2G47090 | 0.001080 | 0.010502 | 0.024504 | 3 | 3.036085 |
| ZFP6 | 0.001005 | 0.299883 | 0.116686 | 3 | 3.417574 |
| MBD7 | 0.000568 | 0.136523 | 0.031505 | 3 | 3.168596 |
| AT4G27900 | 0.000484 | 0.154026 | 0.053676 | 3 | 3.208185 |
| AT1G76350 | 0.000044 | 0.166861 | 0.031505 | 3 | 3.198410 |
| AT3G20010 | 0.000012 | 0.030338 | 0.079347 | 3 | 3.109697 |
| E2F1 | 0.000004 | 0.025671 | 0.051342 | 3 | 3.077017 |
| ARR6 | 0.000004 | 0.145858 | 0.015169 | 3 | 3.161031 |
| VRN1 | 0.000003 | 0.024504 | 0.022170 | 3 | 3.046677 |
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
| cor_betweenness_centrality | end_betweenness_centrality | cor_out_centrality | end_out_centrality | cor_in_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| SCR | 0.837784 | 0.665611 | 0.199219 | 0.156171 | 0.125000 | 0.158690 | 6 | 8.142475 |
| MYB36 | 0.859631 | 0.443688 | 0.083984 | 0.171285 | 0.171875 | 0.234257 | 6 | 7.964720 |
| MYB68 | 0.837684 | 0.401083 | 0.093750 | 0.166247 | 0.125000 | 0.171285 | 6 | 7.795048 |
| 3xHMG-box1 | 0.104035 | 0.000706 | 0.164062 | 0.251889 | 0.109375 | 0.108312 | 6 | 6.738381 |
| PPD1 | 0.033822 | 0.014159 | 0.035156 | 0.206549 | 0.111328 | 0.130982 | 6 | 6.531997 |
| AT1G16640 | 0.001953 | 0.023720 | 0.117188 | 0.123426 | 0.115234 | 0.133501 | 6 | 6.515022 |
| MYB34 | 0.021679 | 0.002926 | 0.111328 | 0.138539 | 0.017578 | 0.020151 | 6 | 6.312202 |
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| cor_betweenness_centrality | cor_out_centrality | cor_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| RVN | 0.457857 | 0.037109 | 0.070312 | 3 | 3.565279 |
| AT2G18850 | 0.244313 | 0.105469 | 0.035156 | 3 | 3.384938 |
| AT3G55080 | 0.113897 | 0.136719 | 0.101562 | 3 | 3.352178 |
| AT5G58280 | 0.032649 | 0.103516 | 0.050781 | 3 | 3.186946 |
| ELO3 | 0.003042 | 0.044922 | 0.029297 | 3 | 3.077261 |
| AT3G04850 | 0.000837 | 0.117188 | 0.019531 | 3 | 3.137556 |
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| end_betweenness_centrality | end_out_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| NF-YB11 | 0.139722 | 0.010076 | 0.057935 | 3 | 3.207732 |
| AT5G23930 | 0.000013 | 0.267003 | 0.078086 | 3 | 3.345101 |
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
| per_betweenness_centrality | pro_betweenness_centrality | xyl_betweenness_centrality | phl_betweenness_centrality | per_out_centrality | pro_out_centrality | xyl_out_centrality | phl_out_centrality | per_in_centrality | pro_in_centrality | xyl_in_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AT3G43430 | 0.878139 | 0.889922 | 0.386851 | 0.340981 | 0.120192 | 0.752967 | 0.124764 | 0.919836 | 0.218750 | 0.771305 | 0.279773 | 0.924974 | 12 | 18.608454 |
| BZIP61 | 0.101489 | 0.748565 | 0.773121 | 0.990170 | 0.153846 | 0.686084 | 0.349716 | 0.345324 | 0.194712 | 0.193096 | 0.332703 | 0.343268 | 12 | 17.212094 |
| MYB20 | 0.828545 | 0.067231 | 0.051183 | 0.974602 | 0.038462 | 0.428263 | 0.289225 | 0.217883 | 0.266827 | 0.482201 | 0.260870 | 0.264132 | 12 | 16.169422 |
| AT5G51780 | 0.781267 | 0.003206 | 0.487022 | 0.782528 | 0.170673 | 0.240561 | 0.395085 | 0.204522 | 0.173077 | 0.159655 | 0.285444 | 0.168551 | 12 | 15.851592 |
| HB40 | 0.000962 | 0.043132 | 0.877442 | 0.100809 | 0.233173 | 0.221143 | 0.413989 | 0.271326 | 0.093750 | 0.073355 | 0.527410 | 0.367934 | 12 | 15.224425 |
| NF-YB2 | 0.027375 | 0.317524 | 0.594242 | 0.045043 | 0.471154 | 0.238403 | 0.185255 | 0.205550 | 0.199519 | 0.140237 | 0.482042 | 0.188078 | 12 | 15.094423 |
| AT3G06590 | 0.013102 | 0.717531 | 0.001113 | 0.008457 | 0.096154 | 0.271845 | 0.128544 | 0.173690 | 0.240385 | 0.187702 | 0.069943 | 0.189106 | 12 | 14.097572 |
| IDD14 | 0.002218 | 0.565865 | 0.012108 | 0.001116 | 0.084135 | 0.154261 | 0.149338 | 0.154162 | 0.158654 | 0.186624 | 0.079395 | 0.187050 | 12 | 13.734927 |
| AT1G64620 | 0.043646 | 0.001445 | 0.249234 | 0.000088 | 0.122596 | 0.097087 | 0.342155 | 0.038027 | 0.146635 | 0.058252 | 0.264650 | 0.058582 | 12 | 13.422396 |
| bZIP4 | 0.016039 | 0.104889 | 0.001099 | 0.002131 | 0.228365 | 0.180151 | 0.102079 | 0.127441 | 0.139423 | 0.045307 | 0.043478 | 0.093525 | 12 | 13.083929 |
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| per_betweenness_centrality | per_out_centrality | per_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| LBD39 | 0.002549 | 0.038462 | 0.060096 | 3 | 3.101106 |
| ERF10 | 0.000023 | 0.115385 | 0.069712 | 3 | 3.185119 |
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| pro_betweenness_centrality | pro_out_centrality | pro_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| GBF6 | 0.832425 | 0.190939 | 0.579288 | 3 | 4.602651 |
| AT2G20100 | 0.734506 | 0.212513 | 0.117584 | 3 | 4.064603 |
| GATA23 | 0.653813 | 0.247033 | 0.096009 | 3 | 3.996855 |
| AT4G29930 | 0.637058 | 0.148867 | 0.221143 | 3 | 4.007069 |
| TEM1 | 0.553198 | 0.062567 | 0.210356 | 3 | 3.826121 |
| RD26 | 0.535346 | 0.102481 | 0.243797 | 3 | 3.881624 |
| AT4G20970 | 0.485725 | 0.112190 | 0.066882 | 3 | 3.664797 |
| AT5G61590 | 0.402557 | 0.162891 | 0.093851 | 3 | 3.659299 |
| SCL1 | 0.390224 | 0.038835 | 0.080906 | 3 | 3.509965 |
| TRFL10 | 0.357408 | 0.096009 | 0.080906 | 3 | 3.534323 |
| LBD29 | 0.283109 | 0.079827 | 0.065804 | 3 | 3.428740 |
| MYBR1 | 0.159231 | 0.080906 | 0.116505 | 3 | 3.356642 |
| NAC089 | 0.084540 | 0.069040 | 0.057174 | 3 | 3.210753 |
| BOP2 | 0.072036 | 0.020496 | 0.025890 | 3 | 3.118422 |
| NAC083 | 0.032846 | 0.081985 | 0.070119 | 3 | 3.184949 |
| HB5 | 0.002549 | 0.139159 | 0.019417 | 3 | 3.161125 |
| NAC080 | 0.002035 | 0.029126 | 0.144552 | 3 | 3.175714 |
| SCL3 | 0.001349 | 0.053937 | 0.033441 | 3 | 3.088728 |
| LAS | 0.001272 | 0.067961 | 0.030205 | 3 | 3.099438 |
| WRKY19 | 0.001079 | 0.021575 | 0.094930 | 3 | 3.117584 |
| ERF7 | 0.001078 | 0.064725 | 0.084142 | 3 | 3.149945 |
| AT1G77570 | 0.000853 | 0.072276 | 0.037756 | 3 | 3.110885 |
| AT4G35270 | 0.000811 | 0.033441 | 0.009709 | 3 | 3.043961 |
| SAC51 | 0.000455 | 0.064725 | 0.102481 | 3 | 3.167662 |
| AT5G58620 | 0.000398 | 0.036677 | 0.065804 | 3 | 3.102880 |
| LRP1 | 0.000016 | 0.059331 | 0.050701 | 3 | 3.110049 |
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| xyl_betweenness_centrality | xyl_out_centrality | xyl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| MYB46 | 0.980262 | 0.298677 | 0.712665 | 3 | 4.991604 |
| XND1 | 0.980019 | 0.387524 | 0.485822 | 3 | 4.853365 |
| VND7 | 0.953475 | 0.519849 | 0.495274 | 3 | 4.968598 |
| AT1G68200 | 0.921733 | 0.251418 | 0.413989 | 3 | 4.587139 |
| HB31 | 0.920036 | 0.111531 | 0.310019 | 3 | 4.341586 |
| ... | ... | ... | ... | ... | ... |
| FBH3 | 0.000823 | 0.088847 | 0.058601 | 3 | 3.148271 |
| OFP1 | 0.000494 | 0.051040 | 0.086957 | 3 | 3.138490 |
| AT3G10760 | 0.000412 | 0.069943 | 0.098299 | 3 | 3.168654 |
| AT2G27930 | 0.000354 | 0.066163 | 0.064272 | 3 | 3.130789 |
| OBP4 | 0.000179 | 0.102079 | 0.103970 | 3 | 3.206228 |
63 rows × 5 columns
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
| phl_betweenness_centrality | phl_out_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| REM22 | 0.951334 | 0.139774 | 0.195272 | 3 | 4.286380 |
| NAC2 | 0.941397 | 0.130524 | 0.196300 | 3 | 4.268221 |
| APL | 0.872514 | 0.689620 | 0.767729 | 3 | 5.329862 |
| AT3G12730 | 0.866460 | 0.236382 | 0.411100 | 3 | 4.513942 |
| NAC086 | 0.717771 | 0.054471 | 0.089414 | 3 | 3.861656 |
| ... | ... | ... | ... | ... | ... |
| AT1G63840 | 0.000002 | 0.054471 | 0.066804 | 3 | 3.121277 |
| AT5G12850 | 0.000001 | 0.055498 | 0.045221 | 3 | 3.100720 |
| AT1G76870 | 0.000001 | 0.040082 | 0.013361 | 3 | 3.053444 |
| SDG20 | 0.000001 | 0.010277 | 0.008222 | 3 | 3.018501 |
| RGA1 | 0.000001 | 0.070915 | 0.061665 | 3 | 3.132581 |
81 rows × 5 columns
gene = 'SHR'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_closeness_centrality | atri_eigenvector_centrality | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_betweenness_centrality | ... | xyl_out_centrality | xyl_in_centrality | xyl_betweenness_centrality | xyl_closeness_centrality | xyl_eigenvector_centrality | phl_degree_centrality | phl_out_centrality | phl_in_centrality | phl_closeness_centrality | phl_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SHR | 5.0 | 0.167139 | 0.155807 | 0.011331 | 0.001709 | 0.046376 | 0.162109 | 0.115234 | 0.046875 | 0.680219 | ... | 0.334594 | 0.294896 | 0.438302 | 0.000752 | 0.082956 | 0.113052 | 0.083248 | 0.029805 | 0.00032 | 0.031782 |
1 rows × 41 columns
gene = 'BLJ'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
gene = 'JKD'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
| tf_occurance | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_betweenness_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_betweenness_centrality | end_closeness_centrality | end_eigenvector_centrality | col_degree_centrality | col_out_centrality | col_in_centrality | col_betweenness_centrality | col_closeness_centrality | col_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| JKD | 3.0 | 0.230469 | 0.017578 | 0.212891 | 0.176454 | 0.001429 | 0.057212 | 0.382872 | 0.007557 | 0.375315 | 0.147953 | 0.001485 | 0.078534 | 0.171529 | 0.010502 | 0.161027 | 0.738642 | 0.000553 | 0.051131 |
gene = 'RVN'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
| tf_occurance | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_betweenness_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| RVN | 1.0 | 0.107422 | 0.037109 | 0.070312 | 0.457857 | 0.001278 | 0.033988 | 0.153652 | 0.025189 | 0.128463 | 0.001301 | 0.043652 |
gene = 'BIB'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
| end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|
| BIB | 0.035264 | 0.007557 | 0.027708 | 0.001034 | 0.012088 |
gene = 'IME'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
gene = 'MYB66'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
gene = 'GL2'
tf_spec[tf_spec.index==gene][tf_spec[tf_spec.index==gene].columns[tf_spec[tf_spec.index==gene].any()]]
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_betweenness_centrality | atri_closeness_centrality | atri_eigenvector_centrality | tri_degree_centrality | tri_out_centrality | tri_in_centrality | ... | cor_degree_centrality | cor_out_centrality | cor_in_centrality | cor_closeness_centrality | cor_eigenvector_centrality | end_degree_centrality | end_out_centrality | end_in_centrality | end_closeness_centrality | end_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GL2 | 2.0 | 0.495751 | 0.201133 | 0.294618 | 0.728424 | 0.002126 | 0.086958 | 0.724014 | 0.336918 | 0.387097 | ... | 0.087891 | 0.068359 | 0.019531 | 0.001053 | 0.029496 | 0.110831 | 0.093199 | 0.017632 | 0.001138 | 0.033544 |
1 rows × 28 columns
tf_spec.to_csv('TF_GRN_centrality_t0-t1_zscore3.csv', index=True)